# encoding:utf8
"""
@File        : pdd.py
@Time        : 2019/7/1 10:22
@Author      : zhaoy
@Email       : zhaoyao@shandiangou.cc
@Description :  拼多多手机app爬虫
"""
import json

import scrapy


class PddSpider(scrapy.Spider):
    name = 'pdd'

    allowed_domains = ['missfresh.cn']
    custom_settings = {
        'DEFAULT_REQUEST_HEADERS': {
            'Host': "api.yangkeduo.com",
            'Cookie': "api_uid=rBQGV10Uaju+ym04mwW+Ag==",
            'content-type': "application/json;charset=UTF-8",
            'referer': "Android",
            'x-pdd-queries': "width=900&height=1600&net=1&brand=Android&model=HUAWEI+"
                             "MLA-AL10&osv=5.1.1&appv=4.62.0&pl=2",
            'etag': "4DZFpqjj",
            'p-appname': "pinduoduo",
            'user-agent': "android Mozilla/5.0 (Linux; Android 5.1.1; HUAWEI MLA-AL10 Build/HUAWEIMLA-AL10; wv) "
                          "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36"
                          "  phh_android_version/4.62.0 phh_android_build/068fc01b0f4a833d8da2e9c2ed76b587c6552d93 "
                          "phh_android_channel/pp",
            'pdd-config': "00102",
            'Accept': "*/*",
        },
        'IMAGES_STORE': 'G:/spiders_data/images/mrxs/',
        'ITEM_PIPELINES': {
            'baiguoyuan.pipelines.GoodsImagesPipeline': 1,
            'baiguoyuan.pipelines.CommomPipelines': 321
        },
    }

    def start_requests(self):
        url = 'https://api.yangkeduo.com/api/caterham/query/fenlei_gyl_group?size=50&opt_name=水果&offset=50&sort_type=' \
              'DEFAULT&flip=&list_id=13_22b4e43482&opt_type=1&support_types=0_3&opt_id=13&pdduid='
        for i in range(1, 1000):
            ul = url + '&num=%s' % (str(i))
            yield scrapy.Request(ul)

    def parse(self, response):
        data = response.text
        data = json.loads(data)
        image_urls = []
        image_header = {
            'Accept': "image/*;q=0.8",
            'Accept-Language': "zh-cn",
            'User-Agent': "android Mozilla/5.0 (Linux; Android 5.1.1; HUAWEI MLA-AL10 Build/HUAWEIMLA-AL10; wv) "
                          "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36"
                          "  phh_android_version/4.62.0 phh_android_build/068fc01b0f4a833d8da2e9c2ed76b587c6552d93 "
                          "phh_android_channel/pp",
        }
        try:
            dt = data.get('list')
        except AttributeError:
            dt = None
        if dt:
            print(dt)
            image_urls = [(item.get('short_name'), item.get('image_url')) for item in dt]
        data.update({'data_collection': 'pdd_goods', 'data_from': 'list', 'image_urls': image_urls,
                     'image_header': image_header})
        yield data
